library(AER)
## Zorunlu paket yükleniyor: car
## Zorunlu paket yükleniyor: carData
## Zorunlu paket yükleniyor: lmtest
## Zorunlu paket yükleniyor: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
## Zorunlu paket yükleniyor: sandwich
## Zorunlu paket yükleniyor: survival
library(TSA)
## 
## Attaching package: 'TSA'
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(forecast)
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
## Registered S3 methods overwritten by 'forecast':
##   method       from
##   fitted.Arima TSA 
##   plot.Arima   TSA
library(tseries)
library(pdR)
library(gridExtra)
library(caschrono)
library(datasets)
library(readxl)
library(anomalize)
library(tidyverse)
## ── Attaching core tidyverse packages ──────────────────────── tidyverse 2.0.0 ──
## ✔ dplyr     1.1.3     ✔ readr     2.1.4
## ✔ forcats   1.0.0     ✔ stringr   1.5.0
## ✔ ggplot2   3.4.4     ✔ tibble    3.2.1
## ✔ lubridate 1.9.3     ✔ tidyr     1.3.0
## ✔ purrr     1.0.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::combine() masks gridExtra::combine()
## ✖ dplyr::filter()  masks stats::filter()
## ✖ dplyr::lag()     masks stats::lag()
## ✖ dplyr::recode()  masks car::recode()
## ✖ purrr::some()    masks car::some()
## ✖ readr::spec()    masks TSA::spec()
## ℹ Use the conflicted package (<http://conflicted.r-lib.org/>) to force all conflicts to become errors
library(dplyr)
library(caschrono)
library(plotly)
## 
## Attaching package: 'plotly'
## 
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## 
## The following object is masked from 'package:stats':
## 
##     filter
## 
## The following object is masked from 'package:graphics':
## 
##     layout
library(hrbrthemes)
## NOTE: Either Arial Narrow or Roboto Condensed fonts are required to use these themes.
##       Please use hrbrthemes::import_roboto_condensed() to install Roboto Condensed and
##       if Arial Narrow is not on your system, please see https://bit.ly/arialnarrow

Data Import

setwd("C:/Users/kagan/Desktop")
options(scipen = 999)


df <- read_excel("ımport.xls")
data <- ts(df$obs,start = c(1989,1),end = c(2023,9),frequency = 12)
df$date <- as.Date(df$date,format="%Y-%m-%d")

Time series plot and descriptives

cbind(head(df),"  ",tail(df))
##         date     obs "  "       date      obs
## 1 1989-01-01 37513.2      2023-04-01 260996.7
## 2 1989-02-01 38561.7      2023-05-01 254134.3
## 3 1989-03-01 39724.7      2023-06-01 251039.7
## 4 1989-04-01 38664.7      2023-07-01 256218.4
## 5 1989-05-01 40909.6      2023-08-01 253667.5
## 6 1989-06-01 39780.9      2023-09-01 260612.7
sum(is.na(data))
## [1] 0
summary(data)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   37513   73623  139189  135539  189809  289568
sd(data)
## [1] 66574
head(df)
## # A tibble: 6 × 2
##   date          obs
##   <date>      <dbl>
## 1 1989-01-01 37513.
## 2 1989-02-01 38562.
## 3 1989-03-01 39725.
## 4 1989-04-01 38665.
## 5 1989-05-01 40910.
## 6 1989-06-01 39781.
tail(data)
##           Apr      May      Jun      Jul      Aug      Sep
## 2023 260996.7 254134.3 251039.7 256218.4 253667.5 260612.7
options(scipen = 999)

ts_plot <- df %>%
  ggplot( aes(x=date, y=obs)) +
  geom_area(fill="grey", alpha=0.5) +
  geom_line(color="grey") +
  labs(title="Time Series Plot of U.S. Imports of \nGoods by Customs Basis from World",
       x="Year",y="Millons of Dollars")+
  theme_minimal()

ts_plot <- ggplotly(ts_plot)
ts_plot

Descriptive Plots

data2<- df %>% dplyr::mutate(year = lubridate::year(date), month = lubridate::month(date))
data2 <- data2[,-1]
head(data2)
## # A tibble: 6 × 3
##      obs  year month
##    <dbl> <dbl> <dbl>
## 1 37513.  1989     1
## 2 38562.  1989     2
## 3 39725.  1989     3
## 4 38665.  1989     4
## 5 40910.  1989     5
## 6 39781.  1989     6
data2$year <- as.factor(data2$year)
data2$month <- as.factor(data2$month)

bp_month <- ggplot(data2,aes(x=month,y=obs,fill=month))+
  geom_boxplot()+
  labs(title="Boxplot Across Months",x="Months",y="Millons of Dollars")+
  theme_minimal()+
  guides(fill = FALSE)
## Warning: The `<scale>` argument of `guides()` cannot be `FALSE`. Use "none" instead as
## of ggplot2 3.3.4.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
bp_month

bp_year <- ggplot(data2,aes(x=year,y=obs,fill=year))+
  geom_boxplot()+
  labs(title="Boxplot Across Years",x="Years",y="Millons of Dollars")+
  guides(fill = FALSE)+
  scale_x_discrete(breaks = seq(1989, 2023, 4), labels = seq(1989, 2023, 4))
bp_year

seasonal_check <- ggplot(data2, aes(as.numeric(as.character(month)), obs))+
  geom_line(aes(colour = year))+
  labs(title="Series by Years",x="",y="Millons of Dollars")+
  guides(colour = FALSE)
seasonal_check

grid.arrange(bp_month,bp_year,seasonal_check,ncol=1)

Cross validation

train <- head(data,-12)
test <- tail(data,12)

length(test)
## [1] 12

Anomaly Detection and Removing

train_for_anomaly <- head(df,-12)
class(train_for_anomaly)
## [1] "tbl_df"     "tbl"        "data.frame"
train_for_anomaly%>% 
  time_decompose(obs, method = "stl", frequency = "auto", trend = "auto") %>%
  anomalize(remainder, method = "gesd", alpha = 0.05, max_anoms = 0.2) %>%
  plot_anomaly_decomposition()
## Converting from tbl_df to tbl_time.
## Auto-index message: index = date
## frequency = 12 months
## trend = 60 months

train_for_anomaly %>% 
  time_decompose(obs) %>%
  anomalize(remainder) %>%
  time_recompose() %>%
  plot_anomalies(time_recomposed = TRUE, ncol = 3, alpha_dots = 0.5)
## Converting from tbl_df to tbl_time.
## Auto-index message: index = date
## frequency = 12 months
## trend = 60 months

library(knitr)


anomalized_set <- train_for_anomaly %>% 
  # 1. Decompose download counts and anomalize the STL decomposition remainder
  time_decompose(obs) %>%
  # 2. Fix negative values if any in observed
  mutate(observed = ifelse(observed < 0, 0, observed)) %>%
  # 3. Identify anomalies
  anomalize(remainder) %>%
  # 4. Clean & repair anomalous data
  clean_anomalies()
## Converting from tbl_df to tbl_time.
## Auto-index message: index = date
## frequency = 12 months
## trend = 60 months
anomalized_set
## # A time tibble: 405 × 9
## # Index:         date
##    date       observed  season  trend remainder remainder_l1 remainder_l2
##    <date>        <dbl>   <dbl>  <dbl>     <dbl>        <dbl>        <dbl>
##  1 1989-01-01   37513.  -336.  38814.   -965.        -18189.       18082.
##  2 1989-02-01   38562. -1054.  38915.    701.        -18189.       18082.
##  3 1989-03-01   39725.   655.  39016.     53.5       -18189.       18082.
##  4 1989-04-01   38665.    89.9 39117.   -542.        -18189.       18082.
##  5 1989-05-01   40910.   293.  39217.   1399.        -18189.       18082.
##  6 1989-06-01   39781.   342.  39318.    121.        -18189.       18082.
##  7 1989-07-01   38951.  -461.  39419.     -6.54      -18189.       18082.
##  8 1989-08-01   40117.   -78.5 39525.    671.        -18189.       18082.
##  9 1989-09-01   39233.    68.0 39631.   -466.        -18189.       18082.
## 10 1989-10-01   40961.   357.  39736.    868.        -18189.       18082.
## # ℹ 395 more rows
## # ℹ 2 more variables: anomaly <chr>, observed_cleaned <dbl>
anomalized_set %>% 
  filter(anomaly == "Yes") %>%
  select(date, anomaly, observed, observed_cleaned) %>%
  head() %>%
  kable()
date anomaly observed observed_cleaned
2008-02-01 Yes 179496 160304.7
2008-04-01 Yes 182776 160506.7
2008-05-01 Yes 183767 160238.9
2008-06-01 Yes 186963 159816.6
2008-07-01 Yes 194334 158542.2
2008-08-01 Yes 186261 159080.6
clean <- data.frame(date=anomalized_set$date,observations=anomalized_set$observed_cleaned)
class(clean)
## [1] "data.frame"
clean_ts <- ts(clean$observations,start = c(1989,1),end = c(2022,9),frequency = 12)
autoplot(clean_ts,col="black",ylab="Millions of Dollars",xlab="Date",
         main="Anomaly Cleaned Time Series Plot",lwd=1)+theme_minimal()+
  autolayer(train,color="red",lwd=1)

Box Cox transformation

BoxCox.ar(clean_ts,method = c("yule-walker"))
## Warning in arima0(x, order = c(i, 0L, 0L), include.mean = demean): possible
## convergence problem: optim gave code = 1

lambda <- BoxCox.lambda(clean_ts)
lambda
## [1] 0.1599589
trans <- BoxCox(clean_ts,lambda)

Trend determining

par(mfrow=c(1,2))
acf(as.vector(trans),main="ACF of Transformed TS")
pacf(as.vector(trans),main="PACF of Transformed TS")

#kpss test
kpss.test(trans,null="Level") #it says that series is not stationary
## Warning in kpss.test(trans, null = "Level"): p-value smaller than printed
## p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  trans
## KPSS Level = 6.603, Truncation lag parameter = 5, p-value = 0.01
kpss.test(trans,null="Trend") # it says that we have stochastic trend
## Warning in kpss.test(trans, null = "Trend"): p-value smaller than printed
## p-value
## 
##  KPSS Test for Trend Stationarity
## 
## data:  trans
## KPSS Trend = 1.3816, Truncation lag parameter = 5, p-value = 0.01
# adf test
mean(trans)
## [1] 34.20278
library(fUnitRoots)

adfTest(trans,lags=1,type = c("c"))
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -1.4309
##   P VALUE:
##     0.5251 
## 
## Description:
##  Sun Jan 21 21:28:39 2024 by user: kagan
adfTest(trans,lags=1,type = c("ct"))
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -1.2314
##   P VALUE:
##     0.9008 
## 
## Description:
##  Sun Jan 21 21:28:39 2024 by user: kagan

Detrending

ndiffs(trans)
## [1] 1
nsdiffs(trans)
## [1] 0
diff <- diff(trans)

kpss.test(diff,null=c("Level"))
## Warning in kpss.test(diff, null = c("Level")): p-value greater than printed
## p-value
## 
##  KPSS Test for Level Stationarity
## 
## data:  diff
## KPSS Level = 0.2168, Truncation lag parameter = 5, p-value = 0.1
mean(diff)
## [1] 0.02922785
adfTest(diff,lags=1,type = "nc")
## Warning in adfTest(diff, lags = 1, type = "nc"): p-value smaller than printed
## p-value
## 
## Title:
##  Augmented Dickey-Fuller Test
## 
## Test Results:
##   PARAMETER:
##     Lag Order: 1
##   STATISTIC:
##     Dickey-Fuller: -16.445
##   P VALUE:
##     0.01 
## 
## Description:
##  Sun Jan 21 21:28:39 2024 by user: kagan
# Differenced TS Plot

autoplot(diff,col="darkblue",main="Differenced Time Series Plot")+theme_minimal()

Model Suggestion

par(mfrow=c(1,2))
acf(as.vector(diff),main="ACF of Differenced TS")
pacf(as.vector(diff),main="PACF of Differenced TS")

# ARIMA(3,1,0)
# ARIMA(3,1,3)
# ARIMA(3,1,1)


armaselect(diff)
##       p q       sbc
##  [1,] 0 1 -1517.819
##  [2,] 0 3 -1515.804
##  [3,] 3 1 -1515.062
##  [4,] 1 1 -1514.228
##  [5,] 1 3 -1513.082
##  [6,] 0 2 -1512.495
##  [7,] 0 4 -1511.753
##  [8,] 2 2 -1510.896
##  [9,] 2 1 -1510.749
## [10,] 4 1 -1509.525
# ARIMA(0,1,1)

eacf(diff)
## AR/MA
##   0 1 2 3 4 5 6 7 8 9 10 11 12 13
## 0 x o x o o o o o o o o  o  x  o 
## 1 x o x o o o o o o o o  o  x  o 
## 2 x x x o o o o o o o o  o  x  o 
## 3 x o x o o o o o o o o  o  o  o 
## 4 x o o x o o o o o o o  o  o  o 
## 5 x o o x x o o o o o o  o  o  o 
## 6 x x x x o x o o o o o  o  x  o 
## 7 o o x x o x o o o o o  o  o  o
# ARIMA(0,1,3)

auto.arima(train)
## Series: train 
## ARIMA(0,1,3) with drift 
## 
## Coefficients:
##          ma1     ma2     ma3     drift
##       0.0448  0.1494  0.1919  571.1675
## s.e.  0.0498  0.0514  0.0479  262.4084
## 
## sigma^2 = 14672816:  log likelihood = -3904.62
## AIC=7819.25   AICc=7819.4   BIC=7839.25
# ARIMA(0,1,3)


fit1<-Arima(trans,order=c(3,1,0),method = "CSS-ML")  #model suggested by us
fit1
## Series: trans 
## ARIMA(3,1,0) 
## 
## Coefficients:
##           ar1     ar2     ar3
##       -0.2686  0.0307  0.2136
## s.e.   0.0486  0.0504  0.0486
## 
## sigma^2 = 0.02414:  log likelihood = 180.36
## AIC=-352.72   AICc=-352.62   BIC=-336.71
#significant
fit2<-Arima(trans,order=c(3,1,3),method = "CSS-ML")  #model suggested by us
fit2
## Series: trans 
## ARIMA(3,1,3) 
## 
## Coefficients:
##          ar1      ar2     ar3      ma1     ma2      ma3
##       0.7058  -0.2725  0.5655  -1.0074  0.5887  -0.5696
## s.e.  0.1503   0.2146  0.1234   0.1316  0.2448   0.1575
## 
## sigma^2 = 0.02335:  log likelihood = 187.99
## AIC=-361.97   AICc=-361.69   BIC=-333.96
#significant
fit3<-Arima(trans,order=c(3,1,1),method = "CSS-ML")  #model suggested by us
fit3
## Series: trans 
## ARIMA(3,1,1) 
## 
## Coefficients:
##          ar1     ar2     ar3      ma1
##       0.4638  0.2166  0.1779  -0.7793
## s.e.  0.1195  0.0571  0.0566   0.1169
## 
## sigma^2 = 0.02389:  log likelihood = 182.96
## AIC=-355.93   AICc=-355.78   BIC=-335.92
#significant
fit4<-Arima(trans,order=c(0,1,1),method = "CSS-ML")  #model suggested by armaselect
fit4
## Series: trans 
## ARIMA(0,1,1) 
## 
## Coefficients:
##           ma1
##       -0.2313
## s.e.   0.0414
## 
## sigma^2 = 0.0254:  log likelihood = 169.2
## AIC=-334.4   AICc=-334.37   BIC=-326.39
#significant
fit5<-Arima(trans,order=c(0,1,3),method = "CSS-ML")  #model suggested by eacf and autoarima
fit5
## Series: trans 
## ARIMA(0,1,3) 
## 
## Coefficients:
##           ma1     ma2     ma3
##       -0.2744  0.0880  0.1480
## s.e.   0.0503  0.0554  0.0486
## 
## sigma^2 = 0.02434:  log likelihood = 178.73
## AIC=-349.47   AICc=-349.36   BIC=-333.46
#significant
(min(cbind(fit1$bic,fit2$bic,fit3$bic,fit4$bic,fit5$bic)))
## [1] -336.7123
# fit1 which is ARIMA(3,1,0) has the lowest BIC value so we will choose it.

Diagnostic Checking

# Normality of Residuals
r <- rstandard(fit1)
head(r)
##             Jan        Feb        Mar        Apr        May        Jun
## 1989  0.1766732  0.9021252  1.2636755 -0.6185974  1.4861713 -0.6438361
r1 <- autoplot(r)+geom_line(y=0)+theme_minimal()+ggtitle("Plot of The Residuals")

r2 <- ggplot(r, aes(sample = r)) +stat_qq()+geom_qq_line()+ggtitle("QQ Plot of the Residuals")+theme_minimal()

r3 <- ggplot(r,aes(x=r))+geom_histogram(bins=20)+geom_density()+ggtitle("Histogram of Residuals")+theme_minimal()

grid.arrange(r1,r2,r3,ncol=1)
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.
## Don't know how to automatically pick scale for object of type <ts>. Defaulting
## to continuous.

jarque.bera.test(r)
## 
##  Jarque Bera Test
## 
## data:  r
## X-squared = 1755.3, df = 2, p-value < 0.00000000000000022
shapiro.test(r)
## 
##  Shapiro-Wilk normality test
## 
## data:  r
## W = 0.89013, p-value < 0.00000000000000022
# Detection of Serial Correlation 

g1 <- ggAcf(as.vector(r),main="ACF of the Residuals",lag = 72)+theme_minimal() #to see time lags, as. factor function is used.
## Warning in ggplot2::geom_segment(lineend = "butt", ...): Ignoring unknown
## parameters: `main`
g2 <- ggPacf(as.vector(r),main="PACF of the Residuals",lag = 72)+theme_minimal()
## Warning in ggplot2::geom_segment(lineend = "butt", ...): Ignoring unknown
## parameters: `main`
grid.arrange(g1,g2,ncol=2)

#Breusch- Godfrey Test
library(lmtest)

z <- lm(r~1+zlag(r))
bgtest(z,order=15)
## 
##  Breusch-Godfrey test for serial correlation of order up to 15
## 
## data:  z
## LM test = 19.927, df = 15, p-value = 0.1747
# Since p value is greater than alpha, we have 95% confident that the residuals of the 
# model are uncorrelated,according to results of Breusch-Godfrey Test.

#Box-Ljung Test

Box.test(r,lag=15,type=c("Ljung-Box"))
## 
##  Box-Ljung test
## 
## data:  r
## X-squared = 20.669, df = 15, p-value = 0.1477
# Since p value is greater than alpha, we have 95% confident that the 
# residuals of the model are uncorrelated, according to results of Box-Ljung Test.


# Heteroscedasticity

r_sq <- r^2
g1_sq<-ggAcf(as.vector(r_sq),lag.max = 72)+theme_minimal()+ggtitle("ACF of Squared Residuals")
g2_sq<-ggPacf(as.vector(r_sq),lag.max = 72)+theme_minimal()+ggtitle("PACF of Squared Residuals")  # heteroscedasticity check
grid.arrange(g1_sq,g2_sq,ncol=2)

library(FinTS)
## 
## Attaching package: 'FinTS'
## 
## The following object is masked from 'package:forecast':
## 
##     Acf
ArchTest(r)
## 
##  ARCH LM-test; Null hypothesis: no ARCH effects
## 
## data:  r
## Chi-squared = 70.626, df = 12, p-value = 0.0000000002445

Garch Forecast

library(rugarch)
## Zorunlu paket yükleniyor: parallel
## 
## Attaching package: 'rugarch'
## The following object is masked from 'package:purrr':
## 
##     reduce
## The following object is masked from 'package:stats':
## 
##     sigma
spec <- ugarchspec(
  variance.model = list(model = "sGARCH", garchOrder = c(1, 1)),
  mean.model = list(armaOrder = c(3, 0), include.mean = TRUE),
  distribution.model = "norm")

garch_model <- ugarchfit(spec, data = train)
## Warning in .makefitmodel(garchmodel = "sGARCH", f = .sgarchLLH, T = T, m = m, : 
## rugarch-->warning: failed to invert hessian
x <- residuals(garch_model)
jarque.bera.test(x)
## 
##  Jarque Bera Test
## 
## data:  x
## X-squared = 106635, df = 2, p-value < 0.00000000000000022
son <- ugarchforecast(garch_model,n.ahead=12)

library(TSstudio)

gf <- fitted(garch_model)
last <- gf[300:405]
last_ts <- xts_to_ts(last, frequency = NULL, start = NULL)

plot(son,which=1)
lines(test,col="red",lty=2 )
lines(last_ts,col="black")
abline(v=2022.700,col="red")

forecasted_values_garch <- as.numeric(son@forecast$seriesFor)

accuracy_g <- function(forecast, actual, insample) {
  residuals <- forecast - actual
  naive_forecast <- stats::lag(insample, k = 1)
  naive_forecast <- c(tail(naive_forecast, -1), NA)  # Align to the length of actual data
  naive_errors <- actual - naive_forecast
  naive_mae <- mean(abs(naive_errors[!is.na(naive_errors)]), na.rm = TRUE)
  return(data.frame(
    ME = mean(residuals, na.rm = TRUE),
    RMSE = sqrt(mean(residuals^2, na.rm = TRUE)),
    MAE = mean(abs(residuals), na.rm = TRUE),
    MAPE = mean(abs(residuals / actual), na.rm = TRUE) * 100,
    MASE = mean(abs(residuals), na.rm = TRUE) / naive_mae)
  )
}


accuracy_results_garch_test <- accuracy_g(forecasted_values_garch,df$obs[406:417], df$obs[1:405]) 
## Warning in actual - naive_forecast: uzun olan nesne uzunluğu kısa olan nesne
## uzunluğunun bir katı değil
fitted_values_garch <- fitted(garch_model)
accuracy_results_garch_train <- accuracy_g(fitted_values_garch,  df$obs[1:405],  df$obs[1:405])

ETS forecast

etsmodel <- ets(train,model="ZZZ")
etsmodel
## ETS(M,Ad,N) 
## 
## Call:
##  ets(y = train, model = "ZZZ") 
## 
##   Smoothing parameters:
##     alpha = 0.7911 
##     beta  = 0.2411 
##     phi   = 0.8 
## 
##   Initial states:
##     l = 38388.5474 
##     b = 226.9602 
## 
##   sigma:  0.0259
## 
##      AIC     AICc      BIC 
## 8906.914 8907.125 8930.937
ets_forecast <- forecast(etsmodel,h=12)

clrs <- c("red", "pink", "yellow", "steelblue")

autoplot(ets_forecast) +
  autolayer(ets_forecast$mean, series="Forecast") +
  autolayer(fitted(etsmodel), series='Fitted') + 
  autolayer(train, series = 'Train') +
  autolayer(test, series='Test', linetype="dashed") +
  xlab("Time") + ylab("Millions of Dollars")  + 
  guides(colour=guide_legend(title="Data series"), 
         fill=guide_legend(title="Prediction interval")) +
  scale_color_manual(values=clrs)+ geom_vline(xintercept = 2022.833,color="black")+
  theme_minimal()

accuracy(ets_forecast,test)
##                      ME     RMSE      MAE        MPE     MAPE      MASE
## Training set   314.8915 3883.749 2383.856  0.2595838 1.807799 0.1822393
## Test set     -6056.2630 7875.335 6962.084 -2.3836407 2.718762 0.5322325
##                     ACF1 Theil's U
## Training set  0.05181398        NA
## Test set     -0.16116411  1.113304
jarque.bera.test(residuals(etsmodel))
## 
##  Jarque Bera Test
## 
## data:  residuals(etsmodel)
## X-squared = 569.12, df = 2, p-value < 0.00000000000000022

TBATS Forecast

tbatsmodel<-tbats(train)
tbatsmodel
## BATS(0.002, {0,3}, 1, -)
## 
## Call: tbats(y = train)
## 
## Parameters
##   Lambda: 0.002488
##   Alpha: 1.603217
##   Beta: 0.007593507
##   Damping Parameter: 1
##   MA coefficients: -0.630339 0.534197 -0.111417
## 
## Seed States:
##              [,1]
## [1,] 10.676114523
## [2,]  0.008896374
## [3,]  0.000000000
## [4,]  0.000000000
## [5,]  0.000000000
## attr(,"lambda")
## [1] 0.002488298
## 
## Sigma: 0.02572335
## AIC: 8896.795
tbats_forecast <- forecast(tbatsmodel,h=12)


clrs <- c("red", "pink", "yellow", "steelblue")

autoplot(tbats_forecast) +
  autolayer(tbats_forecast$mean, series="Forecast") +
  autolayer(fitted(tbatsmodel), series='Fitted') + 
  autolayer(train, series = 'Train') +
  autolayer(test, series='Test', linetype="dashed") +
  xlab("Time") + ylab("Millions of Dollars")  + 
  guides(colour=guide_legend(title="Data series"), 
         fill=guide_legend(title="Prediction interval")) +
  scale_color_manual(values=clrs)+ geom_vline(xintercept = 2022.833,color="black")+
  theme_minimal()

accuracy(tbats_forecast,test)
##                       ME      RMSE       MAE        MPE     MAPE      MASE
## Training set   -211.3894  3848.676  2358.607 -0.1812168 1.773929 0.1803092
## Test set     -17967.9098 20098.467 18392.950 -7.0162789 7.172916 1.4060914
##                    ACF1 Theil's U
## Training set 0.06244265        NA
## Test set     0.43499867  2.904998
jarque.bera.test(residuals(tbatsmodel))
## 
##  Jarque Bera Test
## 
## data:  residuals(tbatsmodel)
## X-squared = 592.83, df = 2, p-value < 0.00000000000000022

Prophet Forecast

library(prophet)
## Zorunlu paket yükleniyor: Rcpp
## Zorunlu paket yükleniyor: rlang
## 
## Attaching package: 'rlang'
## The following objects are masked from 'package:purrr':
## 
##     %@%, flatten, flatten_chr, flatten_dbl, flatten_int, flatten_lgl,
##     flatten_raw, invoke, splice
ds<-c(seq(as.Date("1989/01/01"),as.Date("2022/9/01"),by="month"))
pp<-data.frame(ds,y=as.numeric(train))
head(pp)
##           ds       y
## 1 1989-01-01 37513.2
## 2 1989-02-01 38561.7
## 3 1989-03-01 39724.7
## 4 1989-04-01 38664.7
## 5 1989-05-01 40909.6
## 6 1989-06-01 39780.9
m <- prophet(pp)
## Disabling weekly seasonality. Run prophet with weekly.seasonality=TRUE to override this.
## Disabling daily seasonality. Run prophet with daily.seasonality=TRUE to override this.
future<-make_future_dataframe(m,periods = 12,freq='month') #periods 12, since it's a monthly series.
tail(future,24)
##             ds
## 394 2021-10-01
## 395 2021-11-01
## 396 2021-12-01
## 397 2022-01-01
## 398 2022-02-01
## 399 2022-03-01
## 400 2022-04-01
## 401 2022-05-01
## 402 2022-06-01
## 403 2022-07-01
## 404 2022-08-01
## 405 2022-09-01
## 406 2022-10-01
## 407 2022-11-01
## 408 2022-12-01
## 409 2023-01-01
## 410 2023-02-01
## 411 2023-03-01
## 412 2023-04-01
## 413 2023-05-01
## 414 2023-06-01
## 415 2023-07-01
## 416 2023-08-01
## 417 2023-09-01
forecast <- predict(m, future)
tail(forecast[c('ds', 'yhat', 'yhat_lower', 'yhat_upper')],12)
##             ds     yhat yhat_lower yhat_upper
## 406 2022-10-01 238573.0   221813.6   255149.6
## 407 2022-11-01 238962.4   222391.5   255820.1
## 408 2022-12-01 240076.1   222632.0   256268.1
## 409 2023-01-01 240322.6   223103.9   258040.5
## 410 2023-02-01 239737.8   223323.2   255355.8
## 411 2023-03-01 243032.6   226005.6   259310.2
## 412 2023-04-01 242130.9   224820.8   258532.5
## 413 2023-05-01 243165.3   226833.0   258911.3
## 414 2023-06-01 243730.6   227006.6   260841.5
## 415 2023-07-01 243707.7   226804.4   260362.0
## 416 2023-08-01 244772.9   227578.6   262116.2
## 417 2023-09-01 245975.6   229551.5   263816.8
plot(m, forecast)+theme_minimal()

prophet_plot_components(m, forecast)

# Extract predicted values from the forecast
predicted_values <- forecast$yhat

# Calculate residuals
residuals <- df$obs - predicted_values

jarque.bera.test(residuals)
## 
##  Jarque Bera Test
## 
## data:  residuals
## X-squared = 151.37, df = 2, p-value < 0.00000000000000022
a <- forecast$yhat
a <- tail(a,-12)
accuracy(a,test)
##                ME     RMSE      MAE      MPE     MAPE      ACF1 Theil's U
## Test set 221244.3 221337.5 221244.3 85.54106 85.54106 0.1576206   30.2831
accuracy(predicted_values,train)
##                 ME     RMSE      MAE        MPE     MAPE      ACF1 Theil's U
## Test set -1.359029 13229.52 9214.769 -0.6028779 7.017123 0.9514383  3.495323

NNETAR Forecast

fit <-  nnetar(train,lambda=0)
nnetar_forecast <- forecast(fit,h=12,PI=T)

options(scipen = 999)
clrs <- c("red", "pink", "yellow", "steelblue")

autoplot(nnetar_forecast) +
  autolayer(nnetar_forecast$mean, series="Forecast") +
  autolayer(fitted(fit), series='Fitted') + 
  autolayer(train, series = 'Train') +
  autolayer(test, series='Test', linetype="dashed") +
  xlab("Time") + ylab("Millions of Dollars")  + 
  guides(colour=guide_legend(title="Data series"), 
         fill=guide_legend(title="Prediction interval")) +
  scale_color_manual(values=clrs)+ geom_vline(xintercept = 2022.833,color="black")+
  theme_minimal()
## Warning: Removed 12 rows containing missing values (`geom_line()`).

accuracy(nnetar_forecast,test)
##                      ME     RMSE      MAE         MPE     MAPE      MASE
## Training set   45.95199 3964.377 2434.613 -0.03504184 1.775753 0.1861196
## Test set     2010.17502 6879.788 5266.111  0.76196600 2.036234 0.4025800
##                    ACF1 Theil's U
## Training set 0.03863883        NA
## Test set     0.23861275  0.976854
sef <- as.vector(residuals(fit))
jarque.bera.test(sef)
## 
##  Jarque Bera Test
## 
## data:  sef
## X-squared = 11932, df = 2, p-value < 0.00000000000000022